Saul Diez-Guerra
there ain't no such thing as a free speed-up
__ slots __
Libraries and tools that help you circumvent The Safety of Python!
Polygon area based on its coordinates
*Time measurements are just for reference and are a biased, reductionistic tool that shouldn't be blindly trusted.*
*Especially when you use a single piece of code to test many methods of optimization at once.*
"""
XX
XX
XX
XX
"""
def diag_gen(length):
points = [(0.25, 0.25)]
for _ in xrange(1, length + 1):
points.append((points[-1][0] + 1, points[-1][1]))
points.append((points[-1][0], points[-1][1] + 1))
points.append((points[-1][0], points[-1][1] + 1))
points.append((points[-1][0] - 1, points[-1][1]))
points.append((points[-1][0], points[-1][1] - 1))
for _ in xrange(1, length):
points.append((points[-1][0] - 1, points[-1][1]))
points.append((points[-1][0], points[-1][1] - 1))
points.append((0.25, 0.25))
return points
def test_diag_gen(points):
res = 0
for point in points:
assert abs(point[0] + point[1] - res) <= 1
res = point[0] + point[1]
assert points[0] == points[-1]
test_diag_gen(diag_gen(200))
def area(points):
acc = 0
for i in xrange(len(points) - 1):
acc += points[i][0] * points[i + 1][1] - points[i + 1][0] * points[i][1]
return abs(acc) / 2
assert area(diag_gen(32)) == 64.0
polygon = diag_gen(2*10**5)
%timeit area(polygon)
1 loops, best of 3: 252 ms per loop
SciPy, other than the "stack" and the "ecosystem" that contains amazing things as SciPy (hah), NumPy, IPython, pandas, PyTables, matplotlib... it is also the name of a package filled with algorithms and convenience functions.
N-dimesional arrays. C and fortran for speed, just like SciPy. "Part" of SciPy, but packaged on its own because of its standalone utility.
import numpy as np
polygon_np = np.array(polygon, dtype=np.float64)
def area_np(arr):
return abs((arr[:-1,0] * arr[1:,1] - arr[:-1,1] * arr[1:,0]).sum()) / 2
assert area_np(polygon_np) == 4.*10**5
print " Python Area:".ljust(21),
%timeit area(polygon)
print "Numpy Area:".ljust(20),
%timeit area_np(polygon_np)
Python Area: 1 loops, best of 3: 257 ms per loop Numpy Area: 100 loops, best of 3: 8.84 ms per loop
PyCon ES 2013 - Mi primer numba
import numba as nb
area_nb = nb.jit(area)
area_np_nb = nb.jit(area_np)
print " Python Area:".ljust(21),
%timeit area(polygon)
print "Python Area Numba:".ljust(20),
%timeit area_nb(polygon)
print "Python NumPy:".ljust(20),
%timeit area_np(polygon_np)
print "Python NumPy Numba:".ljust(20),
%timeit area_np_nb(polygon_np)
Python Area: 1 loops, best of 3: 257 ms per loop Python Area Numba: 1 loops, best of 3: 404 ms per loop Python NumPy: 100 loops, best of 3: 8.68 ms per loop Python NumPy Numba: 100 loops, best of 3: 8.56 ms per loop
@nb.vectorize([nb.i8(nb.i8), nb.i4(nb.i4)])
def power(x):
return x * x + 1
power_np = np.vectorize(lambda x: x * x + 1)
arr = np.arange(1000000)
%timeit -n 5 power_np(arr)
%timeit -n100 np.square(arr) + 1
%timeit -n100 power(arr)
5 loops, best of 3: 213 ms per loop 100 loops, best of 3: 5.58 ms per loop 100 loops, best of 3: 5.06 ms per loop
Similar to Numba originally, development is now focusing on manual optimization of functions and it has gone from translation into LLVM (deprecated) to C and OpenMP (gcc).
Experimental support for CUDA.
import parakeet
area_pk = parakeet.jit(area)
area_np_pk = parakeet.jit(area_np)
assert area_pk(polygon) == 4.*10**5
assert area_np_pk(polygon_np) == 4.*10**5
%timeit area_pk(polygon)
%timeit area_np_pk(polygon_np)
1 loops, best of 3: 491 ms per loop 1000 loops, best of 3: 1.28 ms per loop
import numexpr as ne
def area_ne(arr):
arrX, arrx, arrY, arry = arr[:-1,0], arr[1:,0], arr[:-1,1], arr[1:,1]
return abs(ne.evaluate('sum(arrX * arry - arrY * arrX)'))
assert area_ne(polygon_np) == 4.*10**5
%timeit area_np(polygon_np)
%timeit area_ne(polygon_np)
100 loops, best of 3: 8.87 ms per loop 100 loops, best of 3: 2.54 ms per loop
Defines, optimizes and evaluates array expressions. Seamless GPU. CUDA. Native optimizations. Really specialized.
Some tricky dependencies, depends on the platform. Anaconda will solve it.
import os
os.environ['DYLD_FALLBACK_LIBRARY_PATH'] = '/Users/saul/anaconda/lib:' + os.environ.get('DYLD_FALLBACK_LIBRARY_PATH', '')
from theano import tensor as T
from theano import function
def area_th(arr):
x, y, z, j = T.vectors('x', 'y', 'z', 'j')
expr = T.sum((x * y - z * j) * 0.5)
return abs(function([x, y, z, j], expr)(arr[:-1,0], arr[1:,1], arr[:-1,1], arr[1:,0]))
assert area_th(polygon_np) == 400000.
%timeit -n 10 area_np(polygon_np)
%timeit -n 10 area_ne(polygon_np)
%timeit -n 10 area_th(polygon_np)
10 loops, best of 3: 10.6 ms per loop 10 loops, best of 3: 2.53 ms per loop 10 loops, best of 3: 41.5 ms per loop
JIT compiler following the Python spec, but not (C)Python. Pretty mature, lots of support, not entirely there yet in some regards but highly compatible.
pypy » ./bin/pypy shoelace.py
ta: 0.029669 s
cpython » python shoelace.py
pp: 2.539226 s
Fork of the original to make it work. http://buildbot.pypy.org/numpy-status/latest.html
pypy » ./bin/pypy shoelace.py
pp: 0.030495 s
pypy » ./bin/pypy shoelace_np.py
np: 0.213656 s
cpython » python shoelace.py
pp: 2.539226 s
cpython » python shoelace_np.py
np: 0.077828 s
from timeit import Timer
polygon_3 = diag_gen(2*10**5)
polygon_3_np = np.array(diag_gen(2*10**5))
ta = Timer('area(polygon_3)', 'from __main__ import area, polygon_3')
tanp = Timer('area_np(polygon_3_np)', 'from __main__ import area_np, polygon_3_np')
print "ta:\t", round(min(ta.repeat(3, 10)), 6), "s"
print "tanp:\t", round(min(tanp.repeat(3, 10)), 6), "s"
ta: 2.590854 s tanp: 0.085523 s
jython » ./bin/jython shoelace.py
ta: 2.229 s
No numpy :(
""""
python: 2.162128 s
jython: 1.967000 s
npypy : 0.213656 s
numpy : 0.077828 s
pypy : 0.030495 s
""";
Cloud, big data infrastructure and analytics library
https://github.com/serge-sans-paille/pythran
Mehdi Amini, PyData SV 2013 - http://www.youtube.com/watch?v=KKoVeiQOmik
#pythran export dprod(int list, int list)
def dprod(l0,l1):
return sum(x*y for x,y in zip(l0,l1))
C extensions, Cython, Ctypes, CFFI.